# -------------------------------------------------------------------
# Purpose: Creates Figure B13
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate both panels of the figure
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutputappendix))


## Load data
load(file.path(pdataanalysis, "countyLevel18501940.RData"))


## Subset data to 1850 counties
temp <- countyLevel18501940[year == 1850, .(gisjoin_1900, wgt1850 = n_adjp)]
d1 <- merge(countyLevel18501940, temp, by = "gisjoin_1900")
d1 <- d1 %>% select(gisjoin_1900, year, everything())
d1 <- plm::make.pbalanced(d1, balance.type = c("shared.individuals"))


## Winsorize and normalize variables
vars <- c("sum_patents_pc_i", "sum_break_p80_rrfsim05_pc_i")
d1[, paste0(vars, "_w") := lapply(.SD, function(x) winsorize(x, probs = c(0.05, 0.95))), .SDcols = vars]

vars <- c("entropy_namelast_mp_adjp", "n_sum_namelast_adjp")
d1[, paste0(vars, "_w") := lapply(.SD, function(x) winsorize(x, probs = c(.01, .99))), .SDcols = vars]
vars <- c("entropy_namelast_mp_adjp_w", "n_sum_namelast_adjp_w")
d1 <- d1[, paste0(vars, "s") := lapply(.SD, scale), .SDcols = vars]


# Left panel: Patents
mod <- feols(sum_patents_pc_i_w ~ entropy_namelast_mp_adjp_ws:i(year) + n_sum_namelast_adjp_ws | gisjoin_1900 + year^statefip, d1, cluster = ~statefip)
mod <- tidy(mod, conf.int = TRUE)
mod <- mod %>% 
    mutate(year := as.numeric(str_extract(term, "\\d{4}$"))) %>% 
    filter(str_detect(term, "entropy_namelast_mp_adjp_ws"))
plotname <- file.path(poutputappendix, "figureB13left.pdf")
p <- ggplot(mod, aes(x = year, y = estimate)) +
    geom_point() +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 1) +
    scale_x_continuous(breaks = seq(1850, 1940, 10)) +
    labs(
        x = "Surname diversity x Period dummy", y = "Coefficient (95% CI)",
        subtitle = "Patents per 10,000 people",
    ) +
    theme_minimal()
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure saved to:", plotname, "\n")


# Right panel: Breakthrough patents
mod <- feols(sum_break_p80_rrfsim05_pc_i_w ~ entropy_namelast_mp_adjp_ws:i(year) + n_sum_namelast_adjp_ws | gisjoin_1900 + year^statefip, d1, cluster = ~statefip)
mod <- tidy(mod, conf.int = TRUE)
mod <- mod %>%
    mutate(year := as.numeric(str_extract(term, "\\d{4}$"))) %>%
    filter(str_detect(term, "entropy_namelast_mp_adjp_ws"))
plotname <- file.path(poutputappendix, "figureB13right.pdf")
p <- ggplot(mod, aes(x = year, y = estimate)) +
    geom_point() +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 1) +
    scale_x_continuous(breaks = seq(1850, 1940, 10)) +
    labs(
        x = "Surname diversity x Period dummy", y = "Coefficient (95% CI)",
        subtitle = "Breakthrough patents per 10,000 people",
    ) +
    theme_minimal()
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure saved to:", plotname, "\n")
